Jones_1984 <- read.csv("data/raw/Jones-1984.csv")
Ammon_1996 <- read.csv("data/raw/Ammon-1996.csv")
Yelland_2008 <- read.csv("data/raw/Yelland-2008.csv")
Emberger_2023 <- read.csv("data/raw/Thierauf-Emberger-2023.csv")
Wilkinson_IV <- read.csv("data/raw/Wilkinson-1976-IV.csv")
Wilkinson_PO <- read.csv("data/raw/Wilkinson-1976-PO.csv")
Vestal_1977_elderly <- read.csv("data/raw/Vestal-1977-elderly.csv")
Vestal_1977_young <- read.csv("data/raw/Vestal-1977-young.csv")
standard_theme <- theme_bw() +
  theme(plot.title = element_text(size = 30, hjust = 0.5),
    plot.subtitle = element_text(size = 25, hjust = 0.5),
    plot.caption = element_text(size = 20),
    axis.title = element_text(size = 25),
    axis.text.y = element_text(size = 20),
    axis.text.x = element_text(size = 20),
    legend.title = element_text(size = 20),
    legend.text = element_text(size = 22),
    strip.text = element_text(size = 22))
theme_set(standard_theme)
set_flextable_defaults(table.layout = "autofit")

1. Унификация данных

Ammon_1996_clean <- Ammon_1996 %>% 
  mutate(case = paste(subject_id, occasion, sep = "-")) %>% 
  mutate(route = route %>% str_remove_all(" infusion")) %>% 
  rename(infusion_duration_min = duration_inf_min)
rm(Ammon_1996)
Jones_1984_clean <- Jones_1984 %>% 
  mutate(subject_id = paste("J", subject_id, sep = "-"),
         sex = case_when(sex == "male" ~ "M",
                         sex == "female" ~ "F"))
rm(Jones_1984)
Yelland_2008_clean <- Yelland_2008 %>% 
  mutate(case = paste(subject_id, occasion, sep = "-"))
rm(Yelland_2008)
Emberger_2023_clean <- Emberger_2023 %>% 
  filter(!is.na(conc_g_per_L)) %>% 
  rename(wt_kg = WT_kg,
         sex = Sex,
         age = Age,
         bh_cm = BH_cm,
         bmi = BMI) %>% 
  mutate(subject_id = str_c("TE", "-", subject_id))
rm(Emberger_2023)
Wilkinson_IV_clean <- Wilkinson_IV %>% 
  filter(!is.na(conc_g_per_L)) %>% 
  mutate(route = route %>% str_remove_all(" infusion")) %>% 
  rename(wt_kg = WT_kg,
         sex = Sex)
rm(Wilkinson_IV)
Wilkinson_PO_clean <- Wilkinson_PO %>% 
  filter(!is.na(conc_g_per_L)) %>% 
  mutate(case = paste(subject_id, occasion, sep = "-")) %>% 
  rename(wt_kg = WT_kg,
         sex = Sex,
         bh_cm = BH_cm,
         bsa_m2 = BSA_m2,
         age = Age)  
rm(Wilkinson_PO)
Vestal_1977_e_pk <- Vestal_1977_elderly %>% 
rename_with(tolower) %>% 
  rename(Cmax = cmax_mg_dl) %>% 
  mutate(Cmax = Cmax / 100,
         route = case_when(
           str_detect(str_to_lower(route), "iv") ~ "IV",
           str_detect(str_to_lower(route), "po") ~ "PO"),
         subject_id = str_c("V-E-", subject_id))
rm(Vestal_1977_elderly)
Vestal_1977_y_pk <- Vestal_1977_young %>% 
rename_with(tolower) %>% 
  rename(Cmax = cmax_mg_dl) %>% 
  mutate(Cmax = Cmax / 100,
         route = case_when(
           str_detect(str_to_lower(route), "iv") ~ "IV",
           str_detect(str_to_lower(route), "po") ~ "PO"),
         subject_id = str_c("V-Y-", subject_id))  
rm(Vestal_1977_young)

2. Графики по датасетам

2.1 Графики индивидуальных кривых

Ammon_1996_clean %>% 
  select(subject_id, conc_g_per_L, time_min, occasion) %>% 
  mutate(occasion = occasion %>% as.factor) %>% 
  ggplot(aes(color = occasion)) +
  geom_line(aes(x = time_min, y = conc_g_per_L, group = occasion), linewidth = 2) +
  facet_wrap(subject_id~.)+
  scale_x_continuous(breaks = seq(0, 420, by = 50))+
  scale_y_continuous(breaks = seq(0, 1, by = 0.25))+
    labs(title = "Зависимость концентрации от времени", subtitle = "Индивидуальные кривые для каждого субъекта", x = "Время, мин", y = "Концентрация, г/л", color = "Визит", caption = "По данным Ammon-1996")+
  theme(legend.position = "top")

ggplot(Jones_1984_clean, aes(x = time_min, y = conc_g_per_L)) +
  geom_line(aes(group = subject_id)) +
  scale_y_continuous(breaks = seq(0, 1.4, 0.1)) +
  scale_x_continuous(breaks = seq(0, 420, 30)) +
  # coord_cartesian(xlim = c(60, 420)) +
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Индивидуальные кривые для каждого субъекта",
       caption = "По данным Jones-1984",
       x = "Время, мин",
       y = "Концентрация, г/л")

Yelland_2008_clean %>% 
  select(subject_id, conc_g_per_L, time_min, occasion) %>% 
  mutate(occasion = occasion %>% as.factor) %>% 
  ggplot(aes(color = occasion)) +
  geom_line(aes(x = time_min, y = conc_g_per_L, group = occasion), linewidth = 2) +
  facet_wrap(subject_id~.)+
  scale_x_continuous(breaks = seq(0, 420, by = 50))+
  scale_y_continuous(breaks = seq(0, 1, by = 0.25))+
    labs(title = "Зависимость концентрации от времени", subtitle = "Индивидуальные кривые для каждого субъекта", x = "Время, мин", y = "Концентрация, г/л", color = "Визит", caption = "По данным Yelland-2008")+
  theme(legend.position = "top")

Emberger_2023_clean %>% 
  select(subject_id, conc_g_per_L, time_min) %>% 
  mutate(subject_id = subject_id %>% str_remove_all("TE-")) %>%
  ggplot(aes(color = subject_id)) +
  geom_line(aes(x = time_min, y = conc_g_per_L, group = subject_id), linewidth = 2) +
  scale_x_continuous(breaks = seq(0, 420, by = 50)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +
  scale_color_discrete(breaks = as.character(1:10)) +
    labs(title = "Зависимость концентрации от времени",
         subtitle = "Индивидуальные кривые для каждого субъекта", 
         x = "Время, мин", y = "Концентрация, г/л",
         color = "Субъект", 
         caption = "По данным Thierauf-Emberger-2023") +
  theme(legend.position = "top")

ggplot(Wilkinson_IV_clean, aes(x = time_min, y = conc_g_per_L)) +
  geom_line(aes(group = subject_id)) +
  scale_y_continuous(breaks = seq(0, 1.4, 0.1)) +
  scale_x_continuous(breaks = seq(0, 510, 30)) +
  # coord_cartesian(xlim = c(60, 420)) +
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Индивидуальные кривые для каждого субъекта",
       caption = "По данным Wilkinson-1976-IV",
       x = "Время, мин",
       y = "Концентрация, г/л")

Wilkinson_PO_clean %>%
  select(subject_id, conc_g_per_L, time_min, occasion) %>% 
  mutate(occasion = occasion %>% as.factor) %>% 
  ggplot(aes(color = occasion)) +
  geom_line(aes(x = time_min, y = conc_g_per_L, group = occasion), linewidth = 2) +
  facet_wrap(subject_id~.)+
  scale_x_continuous(breaks = seq(0, 420, by = 50))+
  scale_y_continuous(breaks = seq(0, 1, by = 0.25))+
    labs(title = "Зависимость концентрации от времени", subtitle = "Индивидуальные кривые для каждого субъекта", x = "Время, мин", y = "Концентрация, г/л", color = "Визит", caption = "По данным Wilkinson-1976-PO")+
  geom_hline(aes(yintercept = 0.2)) +
  theme(legend.position = "top")

2.2 Среднеее и стандартное отклонение концентрации по времени

Время усреднено

Ammon_1996_clean %>% 
  group_by(case) %>%
  mutate(point_id = row_number() %>% as.factor) %>% 
  ungroup() %>%
  mutate(occasion = occasion %>% as.factor) %>% 
  group_by(occasion, point_id) %>% 
  summarise(conc_avg = mean(conc_g_per_L), conc_max = mean(conc_g_per_L) + sd(conc_g_per_L), conc_min = mean(conc_g_per_L) - sd(conc_g_per_L), time_min_avg = mean(time_min)) %>% 
  ungroup() %>% 
  ggplot() +
  geom_line(aes(x = time_min_avg, y = conc_avg, group = occasion, color = occasion)) +
  geom_errorbar(aes(x = time_min_avg, ymin = conc_min, ymax = conc_max), width = 6, linewidth = 0.5) +
  scale_x_continuous(breaks = seq(0, 400, by = 50)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +    
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Mean ± SD",
       x = "Время, мин",
       y = "Концентрация, г/л",
       color = "Визит",
       caption = "По данным Ammon-1996")

Jones_1984_clean %>% 
  group_by(subject_id) %>%
  mutate(point_id = row_number() %>% as.factor) %>% 
  ungroup() %>% 
  group_by(point_id) %>% 
  summarise(conc_avg = mean(conc_g_per_L), conc_max = mean(conc_g_per_L) + sd(conc_g_per_L), conc_min = mean(conc_g_per_L) - sd(conc_g_per_L), time_min_avg = mean(time_min)) %>% 
  ggplot() +
  geom_line(aes(x = time_min_avg, y = conc_avg)) +
  geom_errorbar(aes(x = time_min_avg, ymin = conc_min, ymax = conc_max), width = 6, linewidth = 0.5) +
  scale_x_continuous(breaks = seq(0, 400, by = 50)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +    
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Mean ± SD",
       x = "Время, мин",
       y = "Концентрация, г/л",
       caption = "По данным Jones-1984")

Yelland_2008_clean %>%
  group_by(case) %>%
  mutate(point_id = row_number() %>% as.factor) %>% 
  ungroup() %>%
  mutate(occasion = occasion %>% as.factor) %>% 
  group_by(occasion, point_id) %>% 
  summarise(conc_avg = mean(conc_g_per_L), conc_max = mean(conc_g_per_L) + sd(conc_g_per_L), conc_min = mean(conc_g_per_L) - sd(conc_g_per_L), time_min_avg = mean(time_min)) %>% 
  ungroup() %>% 
  ggplot() +
  geom_line(aes(x = time_min_avg, y = conc_avg, group = occasion, color = occasion)) +
  geom_errorbar(aes(x = time_min_avg, ymin = conc_min, ymax = conc_max), width = 6, linewidth = 0.5) +
  scale_x_continuous(breaks = seq(0, 400, by = 50)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +    
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Mean ± SD",
       x = "Время, мин",
       y = "Концентрация, г/л",
       color = "Визит",
       caption = "По данным Yelland-2008")

Emberger_2023_clean %>% 
  group_by(subject_id) %>%
  mutate(point_id = row_number() %>% as.factor) %>% 
  ungroup() %>% 
  group_by(point_id) %>% 
  summarise(conc_avg = mean(conc_g_per_L), conc_max = mean(conc_g_per_L) + sd(conc_g_per_L), conc_min = mean(conc_g_per_L) - sd(conc_g_per_L), time_min_avg = mean(time_min)) %>% 
  ggplot() +
  geom_line(aes(x = time_min_avg, y = conc_avg)) +
  geom_errorbar(aes(x = time_min_avg, ymin = conc_min, ymax = conc_max), width = 6, linewidth = 0.5) +
  scale_x_continuous(breaks = seq(0, 400, by = 50)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +    
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Mean ± SD",
       x = "Время, мин",
       y = "Концентрация, г/л",
       caption = "По данным Thierauf-Emberger-2023")

Wilkinson_IV_clean %>% 
  group_by(subject_id) %>%
  mutate(point_id = row_number() %>% as.factor) %>% 
  ungroup() %>% 
  group_by(point_id) %>% 
  summarise(conc_avg = mean(conc_g_per_L), conc_max = mean(conc_g_per_L) + sd(conc_g_per_L), conc_min = mean(conc_g_per_L) - sd(conc_g_per_L), time_min_avg = mean(time_min)) %>% 
  filter(as.numeric(point_id) < 27) %>% 
  ggplot() +
  geom_line(aes(x = time_min_avg, y = conc_avg)) +
  geom_errorbar(aes(x = time_min_avg, ymin = conc_min, ymax = conc_max), width = 6, linewidth = 0.5) +
  scale_x_continuous(breaks = seq(0, 400, by = 50)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +    
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Mean ± SD",
       x = "Время, мин",
       y = "Концентрация, г/л",
       caption = "По данным Wilkinson-1976-IV")  

Wilkinson_PO_clean %>%
  group_by(case) %>%
  mutate(point_id = row_number() %>% as.factor) %>% 
  ungroup() %>% 
  group_by(point_id) %>% 
  summarise(conc_avg = mean(conc_g_per_L), conc_max = mean(conc_g_per_L) + sd(conc_g_per_L), conc_min = mean(conc_g_per_L) - sd(conc_g_per_L), time_min_avg = mean(time_min)) %>% 
  ggplot() +
  geom_line(aes(x = time_min_avg, y = conc_avg)) +
  geom_errorbar(aes(x = time_min_avg, ymin = conc_min, ymax = conc_max), width = 6, linewidth = 0.5) +
  scale_x_continuous(breaks = seq(0, 400, by = 50)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25)) +    
  labs(title = "Зависимость концентрации от времени",
       subtitle = "Mean ± SD",
       x = "Время, мин",
       y = "Концентрация, г/л",
       caption = "По данным Wilkinson-1976-PO")  

3. Расчет ФК параметров

#k - это насколько мы прибавляем к индексу tmax
pharma_fun <- function(conc, t, dose, k = 1) {
  # Моделирование для подсчёта beta, C0, Vd 0 порядка
  t_sample <- {{t}}[(first(which.max({{conc}})) + k):length({{t}})]
  conc_sample <- conc[(first(which.max({{conc}})) + k):length({{conc}})]
  dose <- first({{dose}})
  model <- lm(conc_sample ~ t_sample)
  Beta <- -as.numeric(model$coefficients[2])
  C_0_0_order <- as.numeric(model$coefficients[1])
  V_d_0_order <- dose / C_0_0_order
  
  # Моделирование для подсчёта kel, C0, Vd 1 порядка
  indexes <- conc_sample > 0
  model_1 <- lm(log(conc_sample[indexes]) ~ t_sample[indexes])
  Kel <- -as.numeric(model_1$coefficients[2])
  C_0_1_order <- exp(as.numeric(model_1$coefficients[1]))
  V_d_1_order <- dose / C_0_1_order
  
  # Подсчёт AUC0-t
  
  # if (length({{t}}) < 2) AUC <- NA
  # if (anyNA({{t}}) | anyNA({{conc}})) AUC <- NaN
  t0 = {{t}}[-length({{t}})]
  t1 = {{t}}[-1]
  c0 = {{conc}}[-length({{conc}})]
  c1 = {{conc}}[-1]
  AUC = sum(((c0 + c1)/2)*(t1 - t0))
  
  # Подсчёт Cmax
  cmax <- max(conc)
  
  # Подсчёт Tmax
  tmax <- t[first(which.max(conc))]
  
  # Подсчёт CL
  cl = dose/AUC #l/(kg*min)
  # V = CL/Kel 
  
  return(list(Beta = Beta,
              Kel = Kel,
              C_0_0_order = C_0_0_order,
              V_d_0_order = V_d_0_order,
              C_0_1_order = C_0_1_order,
              V_d_1_order = V_d_1_order,
              AUC = AUC,
              Cmax = cmax,
              Tmax = tmax,
              CL = cl))
}

stat_ph <- list(
        "Среднее" = ~mean(., na.rm = TRUE) %>% round(4),
        "Медиана" = ~median(., na.rm = TRUE)%>% round(4),
        "Стандартное отклонение"   = ~sd(., na.rm = TRUE) %>% round(4),
        "Коэффициент вариации (CV), %" = ~round((sd(., na.rm = TRUE))/(mean(., na.rm = TRUE)) * 100, 2)
        )
Jones_1984_pk <- Jones_1984_clean %>% 
  arrange(time_min) %>% 
  nest(.by = subject_id, .key = "subject_data") %>% 
  mutate(pharma = purrr::map(subject_data,
                            ~pharma_fun(.$conc_g_per_L,
                                        .$time_min,
                                        .$dose_g_per_kg))) %>% 
  unnest_wider(col = pharma) %>% 
  select(-subject_data)

Jones_1984_pk <- Jones_1984_clean %>% 
  select(-c(time_min, conc_g_per_L)) %>% 
  group_by(subject_id) %>% 
  slice(1) %>% 
  ungroup() %>% 
  right_join(Jones_1984_pk)
## Joining with `by = join_by(subject_id)`
rm(Jones_1984_clean)
Ammon_1996_pk <- Ammon_1996_clean %>% 
  arrange(time_min) %>% 
  nest(.by = case, .key = "subject_data") %>% 
  mutate(pharma = purrr::map(subject_data,
                            ~pharma_fun(.$conc_g_per_L,
                                        .$time_min,
                                        .$dose_g_per_kg))) %>% 
  unnest_wider(col = pharma) %>% 
  select(-subject_data)

Ammon_1996_pk <- Ammon_1996_clean %>%
  select(-c(time_min, conc_g_per_L)) %>%
  group_by(case) %>%
  slice(1) %>%
  ungroup() %>%
  right_join(Ammon_1996_pk, by = join_by(case)) %>% 
  mutate(subject_id = str_c("A-IV-", subject_id)) %>% 
  select(-case)
rm(Ammon_1996_clean)
Yelland_2008_pk <- Yelland_2008_clean %>% 
  arrange(time_min) %>% 
  nest(.by = case, .key = "subject_data") %>% 
  mutate(pharma = purrr::map(subject_data,
                            ~pharma_fun(.$conc_g_per_L,
                                        .$time_min,
                                        .$dose_g_per_kg))) %>% 
  unnest_wider(col = pharma) %>%
  select(-subject_data)

Yelland_2008_pk <- Yelland_2008_clean %>%
  select(-c(time_min, conc_g_per_L)) %>%
  group_by(case) %>%
  slice(1) %>%
  ungroup() %>%
  right_join(Yelland_2008_pk, by = join_by(case)) %>%
  mutate(subject_id = str_c("Y-", subject_id)) %>% 
  select(-case)
rm(Yelland_2008_clean)
Emberger_2023_pk <- Emberger_2023_clean %>% 
  arrange(time_min) %>% 
  nest(.by = subject_id, .key = "subject_data") %>% 
  mutate(pharma = purrr::map(subject_data,
                            ~pharma_fun(.$conc_g_per_L,
                                        .$time_min,
                                        .$dose_g_per_kg))) %>% 
  unnest_wider(col = pharma) %>% 
  select(-subject_data)

Emberger_2023_pk <- Emberger_2023_clean %>% 
  select(-c(time_min, conc_g_per_L)) %>% 
  group_by(subject_id) %>% 
  slice(1) %>% 
  ungroup() %>% 
  right_join(Emberger_2023_pk)
## Joining with `by = join_by(subject_id)`
rm(Emberger_2023_clean)
Wilkinson_IV_pk <- Wilkinson_IV_clean %>% 
  arrange(time_min) %>% 
  nest(.by = subject_id, .key = "subject_data") %>% 
  mutate(pharma = purrr::map(subject_data,
                            ~pharma_fun(.$conc_g_per_L,
                                        .$time_min,
                                        .$dose_g_per_kg))) %>% 
  unnest_wider(col = pharma) %>% 
  select(-subject_data)

Wilkinson_IV_pk <- Wilkinson_IV_clean %>% 
  select(-c(time_min, conc_g_per_L)) %>% 
  group_by(subject_id) %>% 
  slice(1) %>% 
  ungroup() %>% 
  right_join(Wilkinson_IV_pk) %>% 
  mutate(subject_id = str_c("W-IV-", subject_id))
## Joining with `by = join_by(subject_id)`
rm(Wilkinson_IV_clean)
Wilkinson_PO_pk <- Wilkinson_PO_clean %>% 
  arrange(time_min) %>% 
  nest(.by = case, .key = "subject_data") %>% 
  mutate(pharma = purrr::map(subject_data,
                            ~pharma_fun(.$conc_g_per_L,
                                        .$time_min,
                                        .$dose_g_per_kg))) %>% 
  unnest_wider(col = pharma) %>% 
  select(-subject_data)

Wilkinson_PO_pk <- Wilkinson_PO_clean %>%
  select(-c(time_min, conc_g_per_L)) %>%
  group_by(case) %>%
  slice(1) %>%
  ungroup() %>%
  right_join(Wilkinson_PO_pk, by = join_by(case)) %>% 
  mutate(subject_id = str_c("W-PO-", subject_id)) %>% 
  select(-case)
rm(Wilkinson_PO_clean)

4. Объединение датасетов

main_df <- bind_rows(Ammon_1996_pk,
                     Emberger_2023_pk,
                     Jones_1984_pk,
                     Wilkinson_IV_pk,
                     Wilkinson_PO_pk,
                     Yelland_2008_pk,
                     Vestal_1977_e_pk,
                     Vestal_1977_y_pk)

main_df <- main_df %>% 
  mutate(across(c(where(is.character), occasion, infusion_duration_min, -subject_id), ~as.factor(.x)))
# Оставил только средние значения по визитам (независимость наблюдений соблюдена)
WILK <- main_df %>% 
  filter(study_id == "Wilkinson-1976-PO") %>% 
  arrange(desc(dose_g_per_kg), .by_group = TRUE) %>% 
  group_by(subject_id) %>% 
  slice(1)
order_0_df <- main_df %>%
  filter(study_id != "Wilkinson-1976-PO") %>%
  group_by(subject_id) %>% 
  mutate(across(c(Beta, C_0_0_order, V_d_0_order, AUC, Cmax, Tmax), ~mean(.x))) %>%
  slice(1) %>% 
  ungroup() %>% 
  select(-occasion) %>% 
  bind_rows(WILK) %>% 
  select(-c(Kel, occasion, contains("1_order"))) 

# nrow(order_0_df)
# length(unique(order_0_df$subject_id))
order_1_df <- main_df %>% filter(Cmax < 0.23)

5. Эксплораторный анализ

5.1 Таблицы

main_df %>% 
  select(-subject_id) %>% 
  gtsummary::tbl_summary()
Characteristic N = 2061
study_id
    Ammon_1996 24 (12%)
    Jones_1984 48 (23%)
    Thierauf_Emberger_2023 10 (4.9%)
    Vestal-1977-elderly 25 (12%)
    Vestal-1977-young 25 (12%)
    Wilkinson-1976-IV 6 (2.9%)
    Wilkinson-1976-PO 32 (16%)
    Yelland_2008 36 (17%)
occasion
    1 32 (35%)
    2 32 (35%)
    3 20 (22%)
    4 8 (8.7%)
    Unknown 114
dose_g_per_kg 0.57 (0.52, 0.68)
route
    IV 80 (39%)
    PO 126 (61%)
infusion_duration_min
    60 74 (93%)
    120 6 (7.5%)
    Unknown 126
food
    fasted 136 (66%)
    fed 70 (34%)
site
    capillary 86 (42%)
    venous 120 (58%)
Beta 0.0022 (0.0019, 0.0026)
    Unknown 50
Kel 0.008 (0.006, 0.012)
    Unknown 50
C_0_0_order 0.93 (0.60, 1.00)
    Unknown 50
V_d_0_order 0.68 (0.60, 0.72)
    Unknown 50
C_0_1_order 1.64 (1.24, 2.15)
    Unknown 50
V_d_1_order 0.33 (0.24, 0.43)
    Unknown 50
AUC 142 (87, 196)
    Unknown 50
Cmax 0.85 (0.65, 1.20)
Tmax 60 (43, 75)
    Unknown 50
CL 0.0039 (0.0033, 0.0051)
    Unknown 50
wt_kg 75 (71, 82)
    Unknown 108
sex
    F 5 (3.4%)
    M 141 (97%)
    Unknown 60
age 32 (23, 61)
    Unknown 114
bh_cm 178 (172, 179)
    Unknown 164
bmi 23.6 (22.3, 25.2)
    Unknown 196
bsa_m2 1.89 (1.83, 2.00)
    Unknown 124
lbm_kg 56 (51, 61)
    Unknown 156
1 n (%); Median (Q1, Q3)
1.  ФК-параметры (endpoints)
•   AUC, Cmax, 
•   C0_0, C0_1
•   Beta, Kel, 
•   Tmax
•   Vd_0,Vd_1
•   CL
2. Условия эксперимента
•   Dose (g/kg)
•   Route (IV/PO)
•   Occasion (1/2/NA)
•   Food (fed/fasted)
•   Infusion_duration_min
•   Study_id
3.  Личные данные пациента
•   WT, BH, BMI, BSA, LBM
•   Sex, Age
label(order_0_df$study_id)   <- "Study"
label(order_0_df$sex)   <- "Sex"
label(order_0_df$dose_g_per_kg)    <- "Dose"
label(order_0_df$food) <- "Food"
label(order_0_df$site) <- "Blood sampling"
label(order_0_df$age) <- "Age"
label(order_0_df$route) <- "Route"

units(order_0_df$dose_g_per_kg)   <- "g per kg"
units(order_0_df$age)   <- "years"

tbl_output_0 <- table1(~ study_id + dose_g_per_kg + route + site + age + sex|food, data = order_0_df, render.missing = NULL, overall=c(left="Total"))


tbl_output_0
Total
(N=146)
fasted
(N=112)
fed
(N=34)
Study
Ammon_1996 12 (8.2%) 0 (0%) 12 (35.3%)
Jones_1984 48 (32.9%) 48 (42.9%) 0 (0%)
Thierauf_Emberger_2023 10 (6.8%) 0 (0%) 10 (29.4%)
Vestal-1977-elderly 25 (17.1%) 25 (22.3%) 0 (0%)
Vestal-1977-young 25 (17.1%) 25 (22.3%) 0 (0%)
Wilkinson-1976-IV 6 (4.1%) 6 (5.4%) 0 (0%)
Wilkinson-1976-PO 8 (5.5%) 8 (7.1%) 0 (0%)
Yelland_2008 12 (8.2%) 0 (0%) 12 (35.3%)
Dose (g per kg)
Mean (SD) 0.596 (0.106) 0.620 (0.0550) 0.517 (0.174)
Median [Min, Max] 0.585 [0.300, 0.700] 0.595 [0.508, 0.683] 0.560 [0.300, 0.700]
Route
IV 68 (46.6%) 56 (50.0%) 12 (35.3%)
PO 78 (53.4%) 56 (50.0%) 22 (64.7%)
Blood sampling
capillary 62 (42.5%) 62 (55.4%) 0 (0%)
venous 84 (57.5%) 50 (44.6%) 34 (100%)
Age (years)
Mean (SD) 48.8 (19.0) 49.2 (19.4) 46.6 (16.9)
Median [Min, Max] 50.7 [20.7, 80.7] 50.7 [20.7, 80.7] 47.5 [28.0, 67.0]
Sex
F 5 (3.4%) 0 (0%) 5 (14.7%)
M 117 (80.1%) 112 (100%) 5 (14.7%)
label(main_df$study_id)   <- "Study"
label(main_df$sex)   <- "Sex"
label(main_df$dose_g_per_kg)    <- "Dose"
label(main_df$food) <- "Food"
label(main_df$site) <- "Blood sampling"
label(main_df$age) <- "Age"
label(main_df$route) <- "Route"

units(main_df$dose_g_per_kg)   <- "g per kg"
units(main_df$age)   <- "years"

tbl_output <- table1(~ study_id + dose_g_per_kg + route + site + age + sex|food, data = main_df, render.missing = NULL, overall=c(left="Total"))


tbl_output
Total
(N=206)
fasted
(N=136)
fed
(N=70)
Study
Ammon_1996 24 (11.7%) 0 (0%) 24 (34.3%)
Jones_1984 48 (23.3%) 48 (35.3%) 0 (0%)
Thierauf_Emberger_2023 10 (4.9%) 0 (0%) 10 (14.3%)
Vestal-1977-elderly 25 (12.1%) 25 (18.4%) 0 (0%)
Vestal-1977-young 25 (12.1%) 25 (18.4%) 0 (0%)
Wilkinson-1976-IV 6 (2.9%) 6 (4.4%) 0 (0%)
Wilkinson-1976-PO 32 (15.5%) 32 (23.5%) 0 (0%)
Yelland_2008 36 (17.5%) 0 (0%) 36 (51.4%)
Dose (g per kg)
Mean (SD) 0.557 (0.157) 0.564 (0.141) 0.543 (0.184)
Median [Min, Max] 0.570 [0.127, 0.700] 0.570 [0.127, 0.683] 0.700 [0.300, 0.700]
Route
IV 80 (38.8%) 56 (41.2%) 24 (34.3%)
PO 126 (61.2%) 80 (58.8%) 46 (65.7%)
Blood sampling
capillary 86 (41.7%) 86 (63.2%) 0 (0%)
venous 120 (58.3%) 50 (36.8%) 70 (100%)
Age (years)
Mean (SD) 42.1 (19.9) 41.6 (20.2) 46.6 (16.9)
Median [Min, Max] 32.3 [20.7, 80.7] 31.3 [20.7, 80.7] 47.5 [28.0, 67.0]
Sex
F 5 (2.4%) 0 (0%) 5 (7.1%)
M 141 (68.4%) 136 (100%) 5 (7.1%)
main_df_long <- main_df %>% 
  select(where(is.numeric)) %>% 
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "value"
  )

ggplot(main_df_long, aes(x = value)) +
  geom_histogram(bins = 30) +
  facet_wrap(~ variable, scales = "free") +
  theme_bw() +
  labs(x = NULL, y = NULL)

ggplot(main_df_long, aes(x = value)) +
  geom_density(na.rm = TRUE) +
  facet_wrap(~ variable, scales = "free") +
  theme_bw() +
  labs(x = NULL, y = NULL)

Можно прологарифмировать переменные с правосторонней ассимметрией Kel, V_d_o, V_d_1, C_0_1_order

main_df_log_long <- main_df %>% 
  mutate(across(c(Kel, V_d_0_order, V_d_1_order, C_0_1_order, AUC, CL), function(x) log(x))) %>% 
  select(where(is.numeric)) %>% 
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "value"
  )

ggplot(main_df_log_long, aes(x = value)) +
  geom_density(na.rm = TRUE) +
  facet_wrap(~ variable, scales = "free") +
  theme_bw() +
  labs(x = NULL, y = NULL)

main_df_fac_long <- main_df %>% 
  select(where(is.factor)) %>% 
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "value"
  )

ggplot(main_df_fac_long, aes(x = value)) +
  geom_bar() +
  facet_wrap(~ variable, scales = "free_x") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = NULL, y = NULL)

5.2 Боксплоты для полного датасета

theme_custom <- theme(
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    axis.title = element_text(size = 14),
    plot.title = element_text(size = 20, hjust = 0.5),
    plot.subtitle = element_text(size = 16, hjust = 0.5),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    panel.background = element_rect(fill = "white"), 
    panel.grid = element_blank()
  )

gender_colours <- c(
      "M" = "#8694AC",
      "F" = "#f28482"
    )

route_colours <- c(
      "IV" = "#DDB892",
      "PO" = "#BAC893"
    )

food_colours <- c(
      "fed" = "#FFB35C",
      "fasted" = "#CDE8F4"
    )

site_colours <- c(
      "capillary" = "#FE3448",
      "venous" = "#7C7F65"
    )

gender_fill <- scale_fill_manual(
    name = "Sex",
    values = gender_colours
  )

route_fill <- scale_fill_manual(
    name = "Route",
    values = route_colours
  )

food_fill <- scale_fill_manual(
    name = "Food",
    values = food_colours
  )

site_fill <- scale_fill_manual(
    name = "Site",
    values = site_colours
  )


gender_color <- scale_colour_manual(
    name = "Sex",
    values = gender_colours
  )
p1 <- main_df %>% 
  filter(!is.na(sex)) %>% 
  ggplot(aes(x = sex, y = Tmax)) +
  geom_violin(aes(fill = sex)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  gender_fill

p2 <- main_df %>% 
  filter(!is.na(sex)) %>%
  ggplot()+
  geom_histogram(aes(x = Tmax, fill = sex), colour = "gray40") + 
  theme_custom + 
  gender_fill

p1 + p2 + plot_annotation(title = "Distribution of Tmax by gender", theme = theme(plot.title = element_text(size = 20)))

p3 <- main_df %>% 
  filter(!is.na(sex)) %>%
  ggplot(aes(x = sex, y = Cmax)) +
  geom_violin(aes(fill = sex)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  gender_fill

p4 <-  main_df %>% 
  filter(!is.na(sex)) %>%
  ggplot()+
  geom_histogram(aes(x = Cmax, fill = sex), colour = "gray40") + 
  theme_custom + 
  gender_fill

p3 + p4 + plot_annotation(title = "Distribution of Cmax by gender", theme = theme(plot.title = element_text(size = 20)))

p5 <- main_df %>% 
  filter(!is.na(sex)) %>% 
  ggplot(aes(x = sex, y = AUC)) +
  geom_violin(aes(fill = sex)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  gender_fill

p6 <- main_df %>% 
  filter(!is.na(sex)) %>%
  ggplot()+
  geom_histogram(aes(x = AUC, fill = sex), colour = "gray40") + 
  theme_custom + 
  gender_fill

p5 + p6 + plot_annotation(title = "Distribution of AUC by gender", theme = theme(plot.title = element_text(size = 20)))

p7 <- main_df %>% 
  ggplot(aes(x = route, y = Tmax)) +
  geom_violin(aes(fill = route)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom +
  route_fill

p8 <- main_df %>% 
  ggplot()+
  geom_histogram(aes(x = Tmax, fill = route), colour = "gray40") + 
  theme_custom +
  route_fill

p7 + p8 + plot_annotation(title = "Distribution of Tmax by route", theme = theme(plot.title = element_text(size = 20)))

p9 <- main_df %>% 
  ggplot(aes(x = route, y = Cmax)) +
  geom_violin(aes(fill = route)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  route_fill

p10 <-  main_df %>% 
  ggplot()+
  geom_histogram(aes(x = Cmax, fill = route), colour = "gray40") + 
  theme_custom + 
  route_fill

p9 + p10 + plot_annotation(title = "Distribution of Cmax by route", theme = theme(plot.title = element_text(size = 20)))

p11 <- main_df %>% 
  ggplot(aes(x = route, y = AUC)) +
  geom_violin(aes(fill = route)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  route_fill

p12 <- main_df %>% 
  filter(!is.na(sex)) %>%
  ggplot()+
  geom_histogram(aes(x = AUC, fill = route), colour = "gray40") + 
  theme_custom + 
  route_fill

p11 + p12 + plot_annotation(title = "Distribution of AUC by route", theme = theme(plot.title = element_text(size = 20)))

p13 <- main_df %>% 
  ggplot(aes(x = food, y = Tmax)) +
  geom_violin(aes(fill = food)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom +
  food_fill

p14 <- main_df %>% 
  ggplot()+
  geom_histogram(aes(x = Tmax, fill = food), colour = "gray40") + 
  theme_custom +
  food_fill

p13 + p14 + plot_annotation(title = "Distribution of Tmax by food consumption", theme = theme(plot.title = element_text(size = 20)))

p15 <- main_df %>% 
  ggplot(aes(x = food, y = Cmax)) +
  geom_violin(aes(fill = food)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  food_fill

p16 <-  main_df %>% 
  ggplot()+
  geom_histogram(aes(x = Cmax, fill = food), colour = "gray40") + 
  theme_custom + 
  food_fill

p15 + p16 + plot_annotation(title = "Distribution of Cmax by food consumption", theme = theme(plot.title = element_text(size = 20)))

p17 <- main_df %>% 
  ggplot(aes(x = food, y = AUC)) +
  geom_violin(aes(fill = food)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  food_fill

p18 <- main_df %>% 
  filter(!is.na(sex)) %>%
  ggplot()+
  geom_histogram(aes(x = AUC, fill = food), colour = "gray40") + 
  theme_custom + 
  food_fill

p17 + p18 + plot_annotation(title = "Distribution of AUC by food consumption", theme = theme(plot.title = element_text(size = 20)))

p19 <- main_df %>% 
  ggplot(aes(x = site, y = Tmax)) +
  geom_violin(aes(fill = site)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom +
  site_fill

p20 <- main_df %>% 
  ggplot()+
  geom_histogram(aes(x = Tmax, fill = site), colour = "gray40") + 
  theme_custom +
  site_fill

p19 + p20 + plot_annotation(title = "Distribution of Tmax by blood sampling", theme = theme(plot.title = element_text(size = 20)))

p21 <- main_df %>% 
  ggplot(aes(x = site, y = Cmax)) +
  geom_violin(aes(fill = site)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  site_fill

p22 <-  main_df %>% 
  ggplot()+
  geom_histogram(aes(x = Cmax, fill = site), colour = "gray40") + 
  theme_custom + 
  site_fill

p21 + p22 + plot_annotation(title = "Distribution of Cmax by blood sampling", theme = theme(plot.title = element_text(size = 20)))

p23 <- main_df %>% 
  ggplot(aes(x = site, y = AUC)) +
  geom_violin(aes(fill = site)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  site_fill

p24 <-  main_df %>% 
  ggplot()+
  geom_histogram(aes(x = AUC, fill = site), colour = "gray40") + 
  theme_custom + 
  site_fill

p23 + p24 + plot_annotation(title = "Distribution of AUC by blood sampling", theme = theme(plot.title = element_text(size = 20)))

5.3 Боксплоты для датасета order_0

p25 <- order_0_df %>% 
  ggplot(aes(x = route, y = Cmax)) +
  geom_violin(aes(fill = route)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  route_fill

p26 <-  order_0_df %>% 
  ggplot()+
  geom_histogram(aes(x = Cmax, fill = route), colour = "gray40") + 
  theme_custom + 
  route_fill

p25 + p26 + plot_annotation(title = "Distribution of Cmax by route", theme = theme(plot.title = element_text(size = 20)))

p27 <- order_0_df %>% 
  ggplot(aes(x = route, y = AUC)) +
  geom_violin(aes(fill = route)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  route_fill

p28 <- order_0_df %>% 
  filter(!is.na(sex)) %>%
  ggplot()+
  geom_histogram(aes(x = AUC, fill = route), colour = "gray40") + 
  theme_custom + 
  route_fill

p27 + p28 + plot_annotation(title = "Distribution of AUC by route", theme = theme(plot.title = element_text(size = 20)))

p29 <- order_0_df %>% 
  ggplot(aes(x = food, y = Cmax)) +
  geom_violin(aes(fill = food)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  food_fill

p30 <-  order_0_df %>% 
  ggplot()+
  geom_histogram(aes(x = Cmax, fill = food), colour = "gray40") + 
  theme_custom + 
  food_fill

p29 + p30 + plot_annotation(title = "Distribution of Cmax by food consumption", theme = theme(plot.title = element_text(size = 20)))

p31 <- order_0_df %>% 
  ggplot(aes(x = food, y = AUC)) +
  geom_violin(aes(fill = food)) +
  geom_boxplot(width = 0.1, color = "black", alpha = 0.5)+
  geom_jitter(color = "gray40", alpha = 0.5, size = 1.5)+
  theme(
    legend.position = "none"
  ) + 
  theme_custom + 
  food_fill

p32 <- order_0_df %>% 
  filter(!is.na(sex)) %>%
  ggplot()+
  geom_histogram(aes(x = AUC, fill = food), colour = "gray40") + 
  theme_custom + 
  food_fill

p31 + p32 + plot_annotation(title = "Distribution of AUC by food consumption", theme = theme(plot.title = element_text(size = 20)))

pk_num <- main_df %>% 
  select(where(is.numeric)) %>% 
  select(-lbm_kg, -bsa_m2, -bh_cm, -bmi) #лишние убрал


cor_mat <- cor(pk_num, use = "pairwise.complete.obs")

cor_mat %>% 
  ggcorr(., label = TRUE)

Проверяем гипотезы о различиях групп по категориям food и site

H0: mean (fasted) = mean(fed) H1: mean (fasted) != mean(fed)

t.test(log(AUC) ~ food, data = order_0_df) 
## 
##  Welch Two Sample t-test
## 
## data:  log(AUC) by food
## t = 9.009, df = 48.616, p-value = 6.134e-12
## alternative hypothesis: true difference in means between group fasted and group fed is not equal to 0
## 95 percent confidence interval:
##  0.4552847 0.7167806
## sample estimates:
## mean in group fasted    mean in group fed 
##             5.264317             4.678284
t.test(log(Cmax) ~ food, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  log(Cmax) by food
## t = 10.867, df = 84.518, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group fasted and group fed is not equal to 0
## 95 percent confidence interval:
##  0.4542268 0.6576812
## sample estimates:
## mean in group fasted    mean in group fed 
##            0.1532168           -0.4027372
t.test(Tmax ~ food, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Tmax by food
## t = -1.2786, df = 93.904, p-value = 0.2042
## alternative hypothesis: true difference in means between group fasted and group fed is not equal to 0
## 95 percent confidence interval:
##  -16.571704   3.589098
## sample estimates:
## mean in group fasted    mean in group fed 
##             62.70968             69.20098
t.test(Beta ~ food, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Beta by food
## t = -0.18184, df = 39.468, p-value = 0.8566
## alternative hypothesis: true difference in means between group fasted and group fed is not equal to 0
## 95 percent confidence interval:
##  -0.0002730811  0.0002280155
## sample estimates:
## mean in group fasted    mean in group fed 
##          0.002169623          0.002192156
t.test(C_0_0_order ~ food, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  C_0_0_order by food
## t = 4.225, df = 37.009, p-value = 0.0001495
## alternative hypothesis: true difference in means between group fasted and group fed is not equal to 0
## 95 percent confidence interval:
##  0.09480385 0.26952408
## sample estimates:
## mean in group fasted    mean in group fed 
##            0.9811502            0.7989862
t.test(V_d_0_order ~ food, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  V_d_0_order by food
## t = 1.5073, df = 46.389, p-value = 0.1385
## alternative hypothesis: true difference in means between group fasted and group fed is not equal to 0
## 95 percent confidence interval:
##  -0.009793554  0.068237153
## sample estimates:
## mean in group fasted    mean in group fed 
##            0.6772378            0.6480160

H0: mean (capillary) = mean(venous) H1: mean (capillary) != mean(venous)

t.test(log(AUC) ~ site, data = order_0_df) 
## 
##  Welch Two Sample t-test
## 
## data:  log(AUC) by site
## t = 9.009, df = 48.616, p-value = 6.134e-12
## alternative hypothesis: true difference in means between group capillary and group venous is not equal to 0
## 95 percent confidence interval:
##  0.4552847 0.7167806
## sample estimates:
## mean in group capillary    mean in group venous 
##                5.264317                4.678284
t.test(log(Cmax) ~ site, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  log(Cmax) by site
## t = -4.4454, df = 112.65, p-value = 2.065e-05
## alternative hypothesis: true difference in means between group capillary and group venous is not equal to 0
## 95 percent confidence interval:
##  -0.3673048 -0.1408344
## sample estimates:
## mean in group capillary    mean in group venous 
##              -0.1224289               0.1316407
t.test(Tmax ~ site, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Tmax by site
## t = -1.2786, df = 93.904, p-value = 0.2042
## alternative hypothesis: true difference in means between group capillary and group venous is not equal to 0
## 95 percent confidence interval:
##  -16.571704   3.589098
## sample estimates:
## mean in group capillary    mean in group venous 
##                62.70968                69.20098
t.test(Beta ~ site, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Beta by site
## t = -0.18184, df = 39.468, p-value = 0.8566
## alternative hypothesis: true difference in means between group capillary and group venous is not equal to 0
## 95 percent confidence interval:
##  -0.0002730811  0.0002280155
## sample estimates:
## mean in group capillary    mean in group venous 
##             0.002169623             0.002192156
t.test(C_0_0_order ~ site, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  C_0_0_order by site
## t = 4.225, df = 37.009, p-value = 0.0001495
## alternative hypothesis: true difference in means between group capillary and group venous is not equal to 0
## 95 percent confidence interval:
##  0.09480385 0.26952408
## sample estimates:
## mean in group capillary    mean in group venous 
##               0.9811502               0.7989862
t.test(V_d_0_order ~ site, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  V_d_0_order by site
## t = 1.5073, df = 46.389, p-value = 0.1385
## alternative hypothesis: true difference in means between group capillary and group venous is not equal to 0
## 95 percent confidence interval:
##  -0.009793554  0.068237153
## sample estimates:
## mean in group capillary    mean in group venous 
##               0.6772378               0.6480160

Проверяем гипотезу о различиях групп по категориям route и sex.

t.test(Beta ~ route, data = order_0_df) 
## 
##  Welch Two Sample t-test
## 
## data:  Beta by route
## t = -3.952, df = 22.053, p-value = 0.0006754
## alternative hypothesis: true difference in means between group IV and group PO is not equal to 0
## 95 percent confidence interval:
##  -0.0007806703 -0.0002433704
## sample estimates:
## mean in group IV mean in group PO 
##      0.001761587      0.002273607
t.test(Beta ~ sex, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Beta by sex
## t = 4.5937, df = 4.4652, p-value = 0.007747
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  0.0003360822 0.0012656766
## sample estimates:
## mean in group F mean in group M 
##     0.002982604     0.002181725
t.test(V_d_0_order ~ route, data = order_0_df) 
## 
##  Welch Two Sample t-test
## 
## data:  V_d_0_order by route
## t = -4.1909, df = 29.494, p-value = 0.0002314
## alternative hypothesis: true difference in means between group IV and group PO is not equal to 0
## 95 percent confidence interval:
##  -0.10884915 -0.03748701
## sample estimates:
## mean in group IV mean in group PO 
##        0.6074394        0.6806074
t.test(V_d_0_order ~ sex, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  V_d_0_order by sex
## t = -3.2625, df = 4.6626, p-value = 0.02478
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.17872778 -0.01926724
## sample estimates:
## mean in group F mean in group M 
##       0.5752923       0.6742898
t.test(log(AUC) ~ route, data = order_0_df) 
## 
##  Welch Two Sample t-test
## 
## data:  log(AUC) by route
## t = -7.8695, df = 25.189, p-value = 3.004e-08
## alternative hypothesis: true difference in means between group IV and group PO is not equal to 0
## 95 percent confidence interval:
##  -0.7934900 -0.4644088
## sample estimates:
## mean in group IV mean in group PO 
##         4.545742         5.174692
t.test(log(AUC) ~ sex, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  log(AUC) by sex
## t = -4.0071, df = 4.3155, p-value = 0.01379
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -1.1037583 -0.2154553
## sample estimates:
## mean in group F mean in group M 
##        4.564367        5.223974
t.test(Cmax ~ route, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Cmax by route
## t = 7.9694, df = 80.102, p-value = 9.216e-12
## alternative hypothesis: true difference in means between group IV and group PO is not equal to 0
## 95 percent confidence interval:
##  0.3849115 0.6411207
## sample estimates:
## mean in group IV mean in group PO 
##        1.3830588        0.8700427
t.test(Cmax ~ sex, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Cmax by sex
## t = -5.4861, df = 5.6395, p-value = 0.001876
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.7970139 -0.3000288
## sample estimates:
## mean in group F mean in group M 
##        0.668000        1.216521
t.test(Tmax ~ route, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Tmax by route
## t = 2.5439, df = 24.323, p-value = 0.01772
## alternative hypothesis: true difference in means between group IV and group PO is not equal to 0
## 95 percent confidence interval:
##   3.588895 34.338455
## sample estimates:
## mean in group IV mean in group PO 
##         80.41667         61.45299
t.test(Tmax ~ sex, data = order_0_df)
## 
##  Welch Two Sample t-test
## 
## data:  Tmax by sex
## t = -0.032711, df = 4.5118, p-value = 0.9753
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -43.19236  42.14162
## sample estimates:
## mean in group F mean in group M 
##        63.40000        63.92537

Результаты: Группы по способу введения различаются по всем фк параметрам. Группы по полу различаются по всем фк-параметрам кроме Tmax.

6. Основной регрессионный анализ

6.1. Построение регрессионных моделей по AUC

Аддитивная модель c основными переменными (доза, способ введения, способ забора крови)

log(AUC) ~ dose_g_per_kg + route + site

fit_AUC_mod <- lm(log(AUC) ~ dose_g_per_kg + route + site, order_0_df) #+food
fit_AUC_submod <- lm(log(AUC) ~ 1, order_0_df)

#broom::tidy(fit_AUC_mod, conf.int = TRUE)
#broom::tidy(fit_AUC_submod, conf.int = TRUE)

summ(fit_AUC_mod, confint = TRUE, digits = 3)
Observations 96 (50 missing obs. deleted)
Dependent variable log(AUC)
Type OLS linear regression
F(3,92) 102.707
0.770
Adj. R² 0.763
Est. 2.5% 97.5% t val. p
(Intercept) 4.167 3.874 4.461 28.200 0.000
dose_g_per_kg 1.544 0.934 2.155 5.021 0.000
routePO 0.085 -0.093 0.262 0.947 0.346
sitevenous -0.342 -0.441 -0.243 -6.875 0.000
Standard errors: OLS
#summary(fit_AUC_mod)
#summary(fit_AUC_submod)

anova(fit_AUC_mod, fit_AUC_submod)
## Analysis of Variance Table
## 
## Model 1: log(AUC) ~ dose_g_per_kg + route + site
## Model 2: log(AUC) ~ 1
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     92  3.3115                                  
## 2     95 14.4023 -3   -11.091 102.71 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
check_model(fit_AUC_mod)

ggpredict(fit_AUC_mod, terms = c("dose_g_per_kg", "route", "site"), ci_level = 0) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) + labs(subtitle = "Adjusted R-squared:  0.7626")
## Model has log-transformed response. Back-transforming predictions to
##   original response scale. Standard errors are still on the transformed
##   scale.

plot_coefs(fit_AUC_mod, omit.coefs = NULL, scale = TRUE, ci_level = 0.95, plot.distributions = FALSE, inner_ci_level = .1, colors = "CUD") + labs(title = "Regression coeffecients with 95% CI")

Аддитивная модель с дополнительными переменными

log(AUC) ~ dose_g_per_kg + route + site + sex

fit_AUC_mod_add <- lm(log(AUC) ~ dose_g_per_kg + route + site + sex, order_0_df) #age не добавить

summ(fit_AUC_mod, confint = TRUE, digits = 3)
Observations 96 (50 missing obs. deleted)
Dependent variable log(AUC)
Type OLS linear regression
F(3,92) 102.707
0.770
Adj. R² 0.763
Est. 2.5% 97.5% t val. p
(Intercept) 4.167 3.874 4.461 28.200 0.000
dose_g_per_kg 1.544 0.934 2.155 5.021 0.000
routePO 0.085 -0.093 0.262 0.947 0.346
sitevenous -0.342 -0.441 -0.243 -6.875 0.000
Standard errors: OLS
summ(fit_AUC_mod_add, confint = TRUE, digits = 3)
Observations 72 (74 missing obs. deleted)
Dependent variable log(AUC)
Type OLS linear regression
F(4,67) 39.615
0.703
Adj. R² 0.685
Est. 2.5% 97.5% t val. p
(Intercept) 3.144 2.407 3.880 8.523 0.000
dose_g_per_kg 3.154 1.797 4.510 4.640 0.000
routePO 0.116 -0.080 0.311 1.180 0.242
sitevenous -0.341 -0.535 -0.147 -3.512 0.001
sexM -0.068 -0.311 0.175 -0.556 0.580
Standard errors: OLS
# summary(fit_AUC_mod)
# summary(fit_AUC_mod_add)


#anova(fit_AUC_mod, fit_AUC_mod_add) для sex не работает anova
check_model(fit_AUC_mod_add)

ggpredict(fit_AUC_mod_add, terms = c("dose_g_per_kg", "route", "site", "sex")) %>% plot(show_data = TRUE, log_y = F, jitter = .01) #+ labs(subtitle = "Adjusted R-squared:  0.7626")
## Model has log-transformed response. Back-transforming predictions to
##   original response scale. Standard errors are still on the transformed
##   scale.

Модель с взаимодействиями

log(AUC) ~ dose_g_per_kg*route + site

Логично, что AUC будет зависеть от комбинации дозы и способа введения.

fit_AUC_mod_int <- lm(log(AUC) ~ dose_g_per_kg*route + site, order_0_df)
# fit_AUC_mod_int2 <- lm(log(AUC) ~ dose_g_per_kg + route*site, order_0_df)
# fit_AUC_mod_int3 <- lm(log(AUC) ~ dose_g_per_kg*site + route, order_0_df)
# fit_AUC_mod_int4 <- lm(log(AUC) ~ dose_g_per_kg*site*route, order_0_df)


summ(fit_AUC_mod_int, confint = TRUE, digits = 3)
Observations 96 (50 missing obs. deleted)
Dependent variable log(AUC)
Type OLS linear regression
F(4,91) 95.676
0.808
Adj. R² 0.799
Est. 2.5% 97.5% t val. p
(Intercept) 4.520 4.204 4.837 28.365 0.000
dose_g_per_kg 0.672 -0.023 1.367 1.921 0.058
routePO -1.079 -1.649 -0.509 -3.762 0.000
sitevenous -0.358 -0.449 -0.266 -7.791 0.000
dose_g_per_kg:routePO 2.107 1.118 3.096 4.233 0.000
Standard errors: OLS
anova(fit_AUC_mod_int, fit_AUC_submod)
## Analysis of Variance Table
## 
## Model 1: log(AUC) ~ dose_g_per_kg * route + site
## Model 2: log(AUC) ~ 1
##   Res.Df     RSS Df Sum of Sq      F    Pr(>F)    
## 1     91  2.7667                                  
## 2     95 14.4023 -4   -11.636 95.676 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
check_model(fit_AUC_mod_int)

ggpredict(fit_AUC_mod_int, terms = c("dose_g_per_kg", "route", "site"), ci_level = 0) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) + labs(subtitle = "Adjusted R-squared:  0.799") + xlab("Dose (g per kg)")
## Model has log-transformed response. Back-transforming predictions to
##   original response scale. Standard errors are still on the transformed
##   scale.

plot_coefs(fit_AUC_mod_int, omit.coefs = NULL, scale = TRUE, ci_level = 0.95, plot.distributions = FALSE, inner_ci_level = .1, colors = "CUD") + labs(title = "Regression coeffecients with 95% CI")

plot_model(fit_AUC_mod_int, show.values = TRUE,  show.intercept = TRUE, value.offset = 0.3,  axis.labels = c("Dose (g per kg)", "Route (PO)", "Blood sampling (venous)", "Dose (g per kg) x Route (PO)", "Intercept"), title = "Regression coeffecients with 95% CI") +
  theme_nice()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
##   Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Выводы:

  1. Включение взаимодействия между dose_g_per_kg и route увеличивает \(R^2\) примерно на 3%.

6.2. Построение регрессионных моделей по Tmax

order_0_df <- order_0_df %>% 
  mutate(bmi = ifelse(is.na(bmi) & !is.na(wt_kg) & !is.na(bh_cm),
                      wt_kg/(bh_cm/100)^2, bmi),
         bsa_m2 = ifelse(is.na(bsa_m2) & !is.na(wt_kg) & !is.na(bh_cm),
                         sqrt((bh_cm * wt_kg)/3600), bsa_m2))

t_max_full_m <- lm(Tmax ~ dose_g_per_kg * food + route + wt_kg, data = order_0_df)

summary(t_max_full_m)
## 
## Call:
## lm(formula = Tmax ~ dose_g_per_kg * food + route + wt_kg, data = order_0_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.900 -10.838   1.341   4.782  42.309 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            253.9999   102.2243   2.485  0.02303 * 
## dose_g_per_kg         -165.4839   145.4369  -1.138  0.27011   
## foodfed               -247.9988   129.1429  -1.920  0.07080 . 
## routePO                -48.0073    12.3665  -3.882  0.00109 **
## wt_kg                   -0.4782     0.4216  -1.134  0.27153   
## dose_g_per_kg:foodfed  433.3209   221.8195   1.953  0.06648 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.64 on 18 degrees of freedom
##   (122 пропущенных наблюдений удалены)
## Multiple R-squared:  0.6062, Adjusted R-squared:  0.4969 
## F-statistic: 5.542 on 5 and 18 DF,  p-value: 0.002916
check_model(t_max_full_m)

ggResidpanel::resid_xpanel(t_max_full_m)

ggResidpanel::resid_panel(t_max_full_m, plots = c("lev", "cookd"))
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggResidpanel package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Оценка коэффициентов модели, используя сэндвич-эстиматор

t_max_coef <- coeftest(t_max_full_m, vcov. = vcovHC, type = "HC3") %>%
broom::tidy(conf.int = TRUE) %>% 
  mutate(term = c("Intercept",
                  "Dose (g/kg)",
                  "Food status: \"Fed\"",
                  "Peroral route",
                  "Weight, kg",
                  "Interaction between\ndose and status \"fed\"")) %>% 
  rename("Variable" = term,
         "β estimate" = estimate,
         "Standard error" = std.error,
         "Wald t-test statistics" = statistic,
         "P-value" = p.value,
         "95%CI low" = conf.low,
         "95%CI high" = conf.high)
t_max_coef %>% 
  mutate(across(where(is.numeric), ~round(.x, 3))) %>% 
  flextable()

Variable

β estimate

Standard error

Wald t-test statistics

P-value

95%CI low

95%CI high

Intercept

254.000

162.510

1.563

0.135

-87.421

595.421

Dose (g/kg)

-165.484

233.769

-0.708

0.488

-656.615

325.647

Food status: "Fed"

-247.999

211.833

-1.171

0.257

-693.044

197.046

Peroral route

-48.007

14.871

-3.228

0.005

-79.249

-16.765

Weight, kg

-0.478

0.675

-0.709

0.488

-1.896

0.939

Interaction between
dose and status "fed"

433.321

353.804

1.225

0.236

-309.993

1,176.635

t_max_prediction_plot <- fortify(t_max_full_m)
## Warning: `fortify(<lm>)` was deprecated in ggplot2 3.6.0.
## ℹ Please use `broom::augment(<lm>)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(t_max_prediction_plot, aes(x = .fitted, y = Tmax)) +
  geom_point(size = 2) +
  ylim(c(25,130)) +
  xlim(c(25,130)) +
  geom_abline(slope = 1, intercept = 0, color = "skyblue3",
              linetype = "dashed", linewidth = 2) +
  geom_text(x = 75, y = 130, label = "Adjusted R-squared = 0.497", size = 8) +
  labs(title = "Predicted vs observed Tmax values",
       x = "Fitted values of Tmax, min",
       y = "Observed values of Tmax, min")

t_max_coef %>% 
ggplot(aes(y = `Variable`)) +
  geom_point(aes(x = `β estimate`), size = 3) +
  geom_errorbar(aes(xmin = `95%CI low`, xmax = `95%CI high`), width = 0.5) +
  geom_vline(xintercept = 0, color = "darkred") +
  labs(title = "Coefficients and 95%CI in Tmax model")

6.3. Построение регрессионных моделей по Cmax

log(Cmax) ~ dose_g_per_kg + route + food + site

order_0_df_factor <- order_0_df %>% 
  mutate(
    route = factor(route),
    food = factor(food),
    site = factor(site)
  )

Cmax_lm2_v1 <- lm(log(Cmax) ~ dose_g_per_kg + route + food + site, data = order_0_df_factor)

summary(Cmax_lm2_v1)
## 
## Call:
## lm(formula = log(Cmax) ~ dose_g_per_kg + route + food + site, 
##     data = order_0_df_factor)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48366 -0.10779  0.01501  0.10903  0.36264 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.60571    0.12810  -4.728 5.43e-06 ***
## dose_g_per_kg  0.57351    0.26664   2.151   0.0332 *  
## routePO        0.11544    0.07734   1.493   0.1378    
## foodfed       -0.94209    0.07201 -13.083  < 2e-16 ***
## sitevenous     0.77382    0.06083  12.720  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1645 on 141 degrees of freedom
## Multiple R-squared:  0.8369, Adjusted R-squared:  0.8322 
## F-statistic: 180.8 on 4 and 141 DF,  p-value: < 2.2e-16
check_model(Cmax_lm2_v1)

broom::tidy(Cmax_lm2_v1, conf.int = TRUE)
## # A tibble: 5 × 7
##   term          estimate std.error statistic  p.value conf.low conf.high
##   <chr>            <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept)     -0.606    0.128      -4.73 5.43e- 6  -0.859     -0.352
## 2 dose_g_per_kg    0.574    0.267       2.15 3.32e- 2   0.0464     1.10 
## 3 routePO          0.115    0.0773      1.49 1.38e- 1  -0.0375     0.268
## 4 foodfed         -0.942    0.0720    -13.1  4.18e-26  -1.08      -0.800
## 5 sitevenous       0.774    0.0608     12.7  3.61e-25   0.654      0.894
#анализе линейности модели по графику Residuals vs Fitted 
autoplot(Cmax_lm2_v1, 1, ncol = 1)

# Диагностика линейности пакетом ggResidpanel
resid_panel(Cmax_lm2_v1, plots = "resid", smoother = TRUE)
## `geom_smooth()` using formula = 'y ~ x'

# Диагностика на гомоскедастичность остатков с помощью графика Scale-Location
autoplot(Cmax_lm2_v1, 3, ncol = 1)

# Диагностика нормальности распределения остатков с помощью графика Normal Q-Q:
resid_panel(Cmax_lm2_v1, plots = c("qq", "hist"))

# Диагностика на наличие выбросов и вилятельных наблюдений с помощью графика  Residuals vs Leverage и графика расстояний Кука
resid_panel(Cmax_lm2_v1, plots = c("lev", "cookd"))

log(Cmax) ~ dose_g_per_kg + route + food + site + sex + age

order_0_df_factor <- order_0_df %>% 
  mutate(
    route = factor(route),
    food = factor(food),
    site = factor(site),
    sex = factor(sex)
  )

Cmax_lm2_v2 <- lm(log(Cmax) ~ dose_g_per_kg + route + food + sex + age, data = order_0_df_factor)

summary(Cmax_lm2_v2)
## 
## Call:
## lm(formula = log(Cmax) ~ dose_g_per_kg + route + food + sex + 
##     age, data = order_0_df_factor)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33748 -0.07599 -0.00931  0.11259  0.29027 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.149688   0.490670   0.305  0.76134    
## dose_g_per_kg -0.012206   0.953701  -0.013  0.98983    
## routePO       -0.507512   0.075700  -6.704 6.97e-09 ***
## foodfed       -0.286139   0.093412  -3.063  0.00324 ** 
## sexM           0.087738   0.119204   0.736  0.46449    
## age            0.004959   0.001153   4.301 6.13e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1523 on 62 degrees of freedom
##   (78 пропущенных наблюдений удалены)
## Multiple R-squared:  0.8574, Adjusted R-squared:  0.8459 
## F-statistic: 74.58 on 5 and 62 DF,  p-value: < 2.2e-16
#check_model(Cmax_lm2_v1)

broom::tidy(Cmax_lm2_v2, conf.int = TRUE)
## # A tibble: 6 × 7
##   term          estimate std.error statistic       p.value conf.low conf.high
##   <chr>            <dbl>     <dbl>     <dbl>         <dbl>    <dbl>     <dbl>
## 1 (Intercept)    0.150     0.491      0.305  0.761         -0.831     1.13   
## 2 dose_g_per_kg -0.0122    0.954     -0.0128 0.990         -1.92      1.89   
## 3 routePO       -0.508     0.0757    -6.70   0.00000000697 -0.659    -0.356  
## 4 foodfed       -0.286     0.0934    -3.06   0.00324       -0.473    -0.0994 
## 5 sexM           0.0877    0.119      0.736  0.464         -0.151     0.326  
## 6 age            0.00496   0.00115    4.30   0.0000613      0.00265   0.00726
#анализе линейности модели по графику Residuals vs Fitted 
autoplot(Cmax_lm2_v2, 1, ncol = 1)

# Диагностика линейности пакетом ggResidpanel
resid_panel(Cmax_lm2_v2, plots = "resid", smoother = TRUE)
## `geom_smooth()` using formula = 'y ~ x'

# Диагностика на гомоскедастичность остатков с помощью графика Scale-Location
autoplot(Cmax_lm2_v2, 3, ncol = 1)

# Диагностика нормальности распределения остатков с помощью графика Normal Q-Q:
resid_panel(Cmax_lm2_v2, plots = c("qq", "hist"))

# Диагностика на наличие выбросов и вилятельных наблюдений с помощью графика  Residuals vs Leverage и графика расстояний Кука
resid_panel(Cmax_lm2_v2, plots = c("lev", "cookd"))

ggpredict(Cmax_lm2_v2, terms = c("age", "route"), ci_level = 0) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) + labs(subtitle = "Adjusted R-squared:  0.8459")
## Model has log-transformed response. Back-transforming predictions to
##   original response scale. Standard errors are still on the transformed
##   scale.

plot_model(Cmax_lm2_v2, show.values = TRUE,  show.intercept = TRUE, value.offset = 0.3, digits = 3,title = "Regression coeffecients with 95% CI") +
  geom_hline(yintercept = 0, color = "gray3", size = 0.5) +
  theme_nice()

log(Cmax) ~ dose_g_per_kg + route + food + site + sex + age + bsa_m2 + lbm_kg

Cmax_lm2_v3 <- lm(log(Cmax) ~ dose_g_per_kg + age + bsa_m2 + lbm_kg, data = order_0_df_factor)

summary(Cmax_lm2_v2)
## 
## Call:
## lm(formula = log(Cmax) ~ dose_g_per_kg + route + food + sex + 
##     age, data = order_0_df_factor)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33748 -0.07599 -0.00931  0.11259  0.29027 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.149688   0.490670   0.305  0.76134    
## dose_g_per_kg -0.012206   0.953701  -0.013  0.98983    
## routePO       -0.507512   0.075700  -6.704 6.97e-09 ***
## foodfed       -0.286139   0.093412  -3.063  0.00324 ** 
## sexM           0.087738   0.119204   0.736  0.46449    
## age            0.004959   0.001153   4.301 6.13e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1523 on 62 degrees of freedom
##   (78 пропущенных наблюдений удалены)
## Multiple R-squared:  0.8574, Adjusted R-squared:  0.8459 
## F-statistic: 74.58 on 5 and 62 DF,  p-value: < 2.2e-16
#check_model(Cmax_lm2_v1)

broom::tidy(Cmax_lm2_v2, conf.int = TRUE)
## # A tibble: 6 × 7
##   term          estimate std.error statistic       p.value conf.low conf.high
##   <chr>            <dbl>     <dbl>     <dbl>         <dbl>    <dbl>     <dbl>
## 1 (Intercept)    0.150     0.491      0.305  0.761         -0.831     1.13   
## 2 dose_g_per_kg -0.0122    0.954     -0.0128 0.990         -1.92      1.89   
## 3 routePO       -0.508     0.0757    -6.70   0.00000000697 -0.659    -0.356  
## 4 foodfed       -0.286     0.0934    -3.06   0.00324       -0.473    -0.0994 
## 5 sexM           0.0877    0.119      0.736  0.464         -0.151     0.326  
## 6 age            0.00496   0.00115    4.30   0.0000613      0.00265   0.00726
#анализе линейности модели по графику Residuals vs Fitted 
autoplot(Cmax_lm2_v2, 1, ncol = 1)

# Диагностика линейности пакетом ggResidpanel
resid_panel(Cmax_lm2_v2, plots = "resid", smoother = TRUE)
## `geom_smooth()` using formula = 'y ~ x'

# Диагностика на гомоскедастичность остатков с помощью графика Scale-Location
autoplot(Cmax_lm2_v2, 3, ncol = 1)

# Диагностика нормальности распределения остатков с помощью графика Normal Q-Q:
resid_panel(Cmax_lm2_v2, plots = c("qq", "hist"))

# Диагностика на наличие выбросов и вилятельных наблюдений с помощью графика  Residuals vs Leverage и графика расстояний Кука
resid_panel(Cmax_lm2_v2, plots = c("lev", "cookd"))